General

library(Seurat)
library(ggplot2)
library(magrittr)
library(dplyr)
library(tibble)
library(stringr)
library(modplots)
broad_order <- c("progenitors",
      "FP",
      "RP",
      "FP/RP",
      "neurons",
      "OPC",
      "MFOL",
      "pericytes",
      "microglia",
      "blood",
      "vasculature"
      )

Ctrl vs Lumbar

se_path <- c("Gg_ctrl_int_seurat_250723",
             "Gg_lumb_int_seurat_250723")

int_path <- "Gg_ctrl_lumb_int_seurat_250723"
my.meta <- list()
annot <- list()
for (i in seq(se_path)) {
  # load the data sets
  my.se <- readRDS(paste0("~/spinal_cord_paper/data/", se_path[i], ".rds"))
  annot[[i]] <- read.csv(list.files("~/spinal_cord_paper/annotations",
                               pattern = str_remove(se_path[i], "_seurat_\\d{6}"),
                               full.names = TRUE))
  
  if(length(table(annot[[i]]$number)) != length(table(my.se$seurat_clusters))) {
     stop("Number of clusters must be identical!")
  }
  
  # rename for left join
  annot[[i]] <- annot[[i]] %>% 
    mutate(fine = paste(fine, number, sep = "_")) %>% 
    mutate(number = factor(number, levels = 1:nrow(annot[[i]]))) %>% 
    rename(seurat_clusters = number)
  
  ord_levels <- annot[[i]]$fine[order(match(annot[[i]]$broad, broad_order))]
   
  # add cluster annotation to meta data
  my.meta[[i]] <- my.se@meta.data %>% 
    rownames_to_column("rowname") %>% 
    left_join(annot[[i]], by = "seurat_clusters") %>% 
    mutate(fine = factor(fine, levels = ord_levels)) %>% 
    mutate(seurat_clusters = factor(seurat_clusters, levels = str_extract(ord_levels, "\\d{1,2}$"))) %>% 
    column_to_rownames("rowname")
}

rm(my.se)
my.se <- readRDS(paste0("~/spinal_cord_paper/data/", int_path, ".rds"))
  annot_int <- read.csv(list.files("~/spinal_cord_paper/annotations",
                               pattern = str_remove(int_path, "_seurat_\\d{6}"),
                               full.names = TRUE))
  
  if(length(table(annot_int$number)) != length(table(my.se$seurat_clusters))) {
     stop("Number of clusters must be identical!")
  }
  
  # rename for left join
  annot_int <- annot_int %>% 
    mutate(fine = paste(fine, number, sep = "_")) %>% 
    mutate(number = factor(number, levels = 1:nrow(annot_int))) %>% 
    rename(seurat_clusters = number)
  
  ord_levels <- annot_int$fine[order(match(annot_int$broad, broad_order))]
   
  # add cluster annotation to meta data
  my.se@meta.data <- my.se@meta.data %>% 
    rownames_to_column("rowname") %>% 
    left_join(annot_int, by = "seurat_clusters") %>% 
    mutate(fine = factor(fine, levels = ord_levels)) %>% 
    mutate(seurat_clusters = factor(seurat_clusters, levels = str_extract(ord_levels, "\\d{1,2}$"))) %>% 
    column_to_rownames("rowname")

Dimplot

clust_col <- read.csv("~/spinal_cord_paper/annotations/broad_cluster_marker_colors.csv")

broad_cols <- clust_col %>% 
    filter(broad_cluster %in% annot_int$broad) %>% 
    pull(color)


DimPlot(my.se, reduction = "tsne", group.by = "broad", label = TRUE, pt.size = 0.5, cols = broad_cols)

transfer cluster labels

The cell IDs of the poly samples got the appendices _3 and _4. Therefore the labels from the previous integration of poly 1 and 2 get switched to 3 and 4, so the cluster annotations can be transfered.

table(stringi::stri_sub(rownames(my.meta[[1]]), -2,-1))
## 
##   _1   _2 
## 2474 5537
table(stringi::stri_sub(rownames(my.meta[[2]]), -2,-1))
## 
##   _1   _2 
## 2487 4599
table(stringi::stri_sub(rownames(my.se@meta.data), -2,-1))
## 
##   _1   _2   _3   _4 
## 2474 5537 2487 4599
my.meta[[2]] <- my.meta[[2]] %>%
  tibble::rownames_to_column("cell_ID") %>%
  dplyr::mutate(cell_ID = str_replace(cell_ID, "_1", "_3")) %>%
  dplyr::mutate(cell_ID = str_replace(cell_ID, "_2", "_4") ) %>%
  tibble::column_to_rownames("cell_ID")

identical(c(rownames(my.meta[[1]]), rownames(my.meta[[2]])), rownames(my.se@meta.data))
## [1] TRUE
cell_annot <- rbind(my.meta[[1]], my.meta[[2]]) %>%
  dplyr::mutate(sample = substr(orig.ident, 4, 7)) %>%
  dplyr::mutate(annot_sample = paste(fine, sample, sep = "_")) %>%
  dplyr::mutate(broad_sample = paste(broad, sample, sep = "_"))

my.se <- AddMetaData(my.se, cell_annot[, c("sample","annot_sample", "broad_sample")]) 
brach_poly_combined_labels <- cell_annot[, c("sample","annot_sample", "broad_sample")]

saveRDS(brach_poly_combined_labels, file = "~/spinal_cord_paper/annotations/ctrl_lumb_int_combined_labels.rds")
p1 <- DimPlot(
  my.se,
  group.by = "annot_sample",
  reduction = "tsne",
  label = TRUE,
  repel = TRUE
  ) +
  NoLegend()

p2 <- DimPlot(
  my.se,
  group.by = "annot_sample",
  reduction = "tsne",
  label = FALSE,
  split.by = "sample",
  repel = TRUE
  ) +
  NoLegend()

p3 <- DimPlot(
  my.se,
  group.by = "broad_sample",
  reduction = "tsne",
  label = TRUE,
  repel = TRUE
  )

p1 + p2 + p3

## Sister Pair DE

Here we run DE analasys between terminal sister pairs of different origin from the heatmap in spinal_cord_paper/markdown/heatmap_spearman_ctrl_lumb_poly_int.html.

# sister pairs
sis_pairs <- data.frame(
  "ctrl" = c(16, 6, 20, 11, 8),
  "lumb" = c(24, 20, 20, 15, 15)
) 

# cell type annotations
sis_ctrl <- annot[[1]][sis_pairs$ctrl,] %>%
  droplevels() %>%
  mutate(fine = paste0(fine, "_ctrl"))
sis_lumb <- annot[[2]][sis_pairs$lumb,] %>%
  droplevels() %>%
  mutate(fine = paste0(fine, "_lumb"))


sis_annot <- cbind(sis_ctrl, sis_lumb) %>% tibble::remove_rownames()
colnames(sis_annot) <- paste0(c(rep("ctrl_",3),rep("lumb_",3)),
                             colnames(sis_annot))

sis_annot
Idents(my.se) <- "annot_sample"

sis_markers <- list()

for (i in seq(nrow(sis_annot))) {
  sis_markers[[i]] <- FindMarkers(
    my.se,
    ident.1 = sis_annot[i, "ctrl_fine"],
    ident.2 = sis_annot[i, "lumb_fine"],
    assay = "RNA",
    verbose = FALSE,
    only.pos = FALSE,
    min.pct = 0.25,
    logfc.threshold =  0.25,
    latent.vars = c("CC.Difference.seurat"),
    test.use = "MAST"
  ) %>%
    tibble::rownames_to_column("Gene.stable.ID") %>%
    dplyr::left_join(gnames, by = "Gene.stable.ID") %>%
    dplyr::arrange(-avg_log2FC) %>%
    dplyr::filter(p_val_adj < 0.05) %>%
    dplyr::filter(abs(avg_log2FC) > 0.5) %>%
  dplyr::mutate(delta_pct = abs(pct.1 - pct.2))
}

names(sis_markers) <- paste0(sis_annot$ctrl_fine, "-vs-", sis_annot$lumb_fine)

saveRDS(sis_markers, file = "~/spinal_cord_paper/data/Gg_ctrl_lumb_sis_markers.rds")

Ctrl vs Poly

se_path <- c("Gg_ctrl_int_seurat_250723",
             "Gg_poly_int_seurat_250723")

int_path <- "Gg_ctrl_poly_int_seurat_250723"
my.meta <- list()
annot <- list()
for (i in seq(se_path)) {
  # load the data sets
  my.se <- readRDS(paste0("~/spinal_cord_paper/data/", se_path[i], ".rds"))
  annot[[i]] <- read.csv(list.files("~/spinal_cord_paper/annotations",
                               pattern = str_remove(se_path[i], "_seurat_\\d{6}"),
                               full.names = TRUE))
  
  if(length(table(annot[[i]]$number)) != length(table(my.se$seurat_clusters))) {
     stop("Number of clusters must be identical!")
  }
  
  # rename for left join
  annot[[i]] <- annot[[i]] %>% 
    mutate(fine = paste(fine, number, sep = "_")) %>% 
    mutate(number = factor(number, levels = 1:nrow(annot[[i]]))) %>% 
    rename(seurat_clusters = number)
  
  ord_levels <- annot[[i]]$fine[order(match(annot[[i]]$broad, broad_order))]
   
  # add cluster annotation to meta data
  my.meta[[i]] <- my.se@meta.data %>% 
    rownames_to_column("rowname") %>% 
    left_join(annot[[i]], by = "seurat_clusters") %>% 
    mutate(fine = factor(fine, levels = ord_levels)) %>% 
    mutate(seurat_clusters = factor(seurat_clusters, levels = str_extract(ord_levels, "\\d{1,2}$"))) %>% 
    column_to_rownames("rowname")
}

rm(my.se)
my.se <- readRDS(paste0("~/spinal_cord_paper/data/", int_path, ".rds"))
  annot_int <- read.csv(list.files("~/spinal_cord_paper/annotations",
                               pattern = str_remove(int_path, "_seurat_\\d{6}"),
                               full.names = TRUE))
  
  if(length(table(annot_int$number)) != length(table(my.se$seurat_clusters))) {
     stop("Number of clusters must be identical!")
  }
  
  # rename for left join
  annot_int <- annot_int %>% 
    mutate(fine = paste(fine, number, sep = "_")) %>% 
    mutate(number = factor(number, levels = 1:nrow(annot_int))) %>% 
    rename(seurat_clusters = number)
  
  ord_levels <- annot_int$fine[order(match(annot_int$broad, broad_order))]
   
  # add cluster annotation to meta data
  my.se@meta.data <- my.se@meta.data %>% 
    rownames_to_column("rowname") %>% 
    left_join(annot_int, by = "seurat_clusters") %>% 
    mutate(fine = factor(fine, levels = ord_levels)) %>% 
    mutate(seurat_clusters = factor(seurat_clusters, levels = str_extract(ord_levels, "\\d{1,2}$"))) %>% 
    column_to_rownames("rowname")

Dimplot

broad_cols <- clust_col %>% 
    filter(broad_cluster %in% annot_int$broad) %>% 
    pull(color)


DimPlot(my.se, reduction = "tsne", group.by = "broad", label = TRUE, pt.size = 0.5, cols = broad_cols)

transfer cluster labels

The cell IDs of the poly samples got the appendices _3 and _4. Therefore the labels from the previous integration of poly 1 and 2 get switched to 3 and 4, so the cluster annotations can be transfered.

table(stringi::stri_sub(rownames(my.meta[[1]]), -2,-1))
## 
##   _1   _2 
## 2474 5537
table(stringi::stri_sub(rownames(my.meta[[2]]), -2,-1))
## 
##   _1   _2 
## 2592 4334
table(stringi::stri_sub(rownames(my.se@meta.data), -2,-1))
## 
##   _1   _2   _3   _4 
## 2474 5537 2592 4334
my.meta[[2]] <- my.meta[[2]] %>%
  tibble::rownames_to_column("cell_ID") %>%
  dplyr::mutate(cell_ID = str_replace(cell_ID, "_1", "_3")) %>%
  dplyr::mutate(cell_ID = str_replace(cell_ID, "_2", "_4") ) %>%
  tibble::column_to_rownames("cell_ID")

identical(c(rownames(my.meta[[1]]), rownames(my.meta[[2]])), rownames(my.se@meta.data))
## [1] TRUE
cell_annot <- rbind(my.meta[[1]], my.meta[[2]]) %>%
  dplyr::mutate(sample = substr(orig.ident, 4, 7)) %>%
  dplyr::mutate(annot_sample = paste(fine, sample, sep = "_")) %>%
  dplyr::mutate(broad_sample = paste(broad, sample, sep = "_"))

my.se <- AddMetaData(my.se, cell_annot[, c("sample","annot_sample", "broad_sample")]) 
brach_poly_combined_labels <- cell_annot[, c("sample","annot_sample", "broad_sample")]

saveRDS(brach_poly_combined_labels, file = "~/spinal_cord_paper/annotations/ctrl_poly_int_combined_labels.rds")
p1 <- DimPlot(
  my.se,
  group.by = "annot_sample",
  reduction = "tsne",
  label = TRUE,
  repel = TRUE
  ) +
  NoLegend()

p2 <- DimPlot(
  my.se,
  group.by = "annot_sample",
  reduction = "tsne",
  label = FALSE,
  split.by = "sample",
  repel = TRUE
  ) +
  NoLegend()

p3 <- DimPlot(
  my.se,
  group.by = "broad_sample",
  reduction = "tsne",
  label = TRUE,
  repel = TRUE
  )

p1 + p2 + p3

## Sister Pair DE

Here we run DE analasys between terminal sister pairs of different origin from the heatmap in spinal_cord_paper/markdown/heatmap_spearman_ctrl_lumb_poly_int.html.

# sister pairs
sis_pairs <- data.frame(
  "ctrl" = c(16, 16, 20, 11, 8, 6),
  "poly" = c(14, 26, 22, 15, 15, 6)
) 

# cell type annotations
sis_ctrl <- annot[[1]][sis_pairs$ctrl,] %>%
  droplevels() %>%
  mutate(fine = paste0(fine, "_ctrl"))
sis_poly <- annot[[2]][sis_pairs$poly,] %>%
  droplevels() %>%
  mutate(fine = paste0(fine, "_poly"))


sis_annot <- cbind(sis_ctrl, sis_poly) %>% tibble::remove_rownames()
colnames(sis_annot) <- paste0(c(rep("ctrl_",3),rep("poly_",3)),
                             colnames(sis_annot))

sis_annot
Idents(my.se) <- "annot_sample"

sis_markers <- list()

for (i in seq(nrow(sis_annot))) {
  sis_markers[[i]] <- FindMarkers(
    my.se,
    ident.1 = sis_annot[i, "ctrl_fine"],
    ident.2 = sis_annot[i, "poly_fine"],
    assay = "RNA",
    verbose = FALSE,
    only.pos = FALSE,
    min.pct = 0.25,
    logfc.threshold =  0.25,
    latent.vars = c("CC.Difference.seurat"),
    test.use = "MAST"
  ) %>%
    tibble::rownames_to_column("Gene.stable.ID") %>%
    dplyr::left_join(gnames, by = "Gene.stable.ID") %>%
    dplyr::arrange(-avg_log2FC) %>%
    dplyr::filter(p_val_adj < 0.05) %>%
    dplyr::filter(abs(avg_log2FC) > 0.5) %>%
  dplyr::mutate(delta_pct = abs(pct.1 - pct.2))
}

names(sis_markers) <- paste0(sis_annot$ctrl_fine, "-vs-", sis_annot$poly_fine)

saveRDS(sis_markers, file = "~/spinal_cord_paper/data/Gg_ctrl_poly_sis_markers.rds")
# Date and time of Rendering
Sys.time()
## [1] "2023-09-21 17:03:28 CEST"
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /scicore/soft/apps/OpenBLAS/0.3.1-GCC-7.3.0-2.30/lib/libopenblas_sandybridgep-r0.3.1.so
## 
## locale:
## [1] en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] modplots_1.0.0     stringr_1.4.0      tibble_3.1.8       dplyr_1.0.10      
## [5] magrittr_2.0.1     ggplot2_3.3.3      SeuratObject_4.0.2 Seurat_4.0.5      
## 
## loaded via a namespace (and not attached):
##   [1] plyr_1.8.6                  igraph_1.2.6               
##   [3] lazyeval_0.2.2              sp_1.4-5                   
##   [5] splines_4.1.0               listenv_0.8.0              
##   [7] scattermore_0.7             GenomeInfoDb_1.28.0        
##   [9] digest_0.6.27               htmltools_0.5.1.1          
##  [11] fansi_0.5.0                 memoise_2.0.0              
##  [13] tensor_1.5                  cluster_2.1.2              
##  [15] ROCR_1.0-11                 globals_0.16.2             
##  [17] Biostrings_2.60.0           matrixStats_0.58.0         
##  [19] spatstat.sparse_3.0-0       prettyunits_1.1.1          
##  [21] colorspace_2.0-1            blob_1.2.1                 
##  [23] ggrepel_0.9.1               xfun_0.34                  
##  [25] crayon_1.4.1                RCurl_1.98-1.3             
##  [27] jsonlite_1.7.2              spatstat.data_3.0-0        
##  [29] survival_3.2-11             zoo_1.8-9                  
##  [31] glue_1.6.2                  polyclip_1.10-0            
##  [33] gtable_0.3.0                zlibbioc_1.38.0            
##  [35] XVector_0.32.0              leiden_0.3.9               
##  [37] DelayedArray_0.18.0         SingleCellExperiment_1.14.1
##  [39] future.apply_1.7.0          BiocGenerics_0.38.0        
##  [41] abind_1.4-5                 scales_1.1.1               
##  [43] pheatmap_1.0.12             DBI_1.1.1                  
##  [45] miniUI_0.1.1.1              Rcpp_1.0.7                 
##  [47] progress_1.2.2              viridisLite_0.4.0          
##  [49] xtable_1.8-4                reticulate_1.22            
##  [51] spatstat.core_2.1-2         bit_4.0.4                  
##  [53] stats4_4.1.0                htmlwidgets_1.5.3          
##  [55] httr_1.4.2                  RColorBrewer_1.1-2         
##  [57] ellipsis_0.3.2              ica_1.0-2                  
##  [59] farver_2.1.0                pkgconfig_2.0.3            
##  [61] sass_0.4.0                  uwot_0.1.10                
##  [63] deldir_1.0-6                utf8_1.2.1                 
##  [65] labeling_0.4.2              tidyselect_1.2.0           
##  [67] rlang_1.0.6                 reshape2_1.4.4             
##  [69] later_1.2.0                 AnnotationDbi_1.54.0       
##  [71] munsell_0.5.0               tools_4.1.0                
##  [73] cachem_1.0.5                cli_3.4.1                  
##  [75] generics_0.1.3              RSQLite_2.2.7              
##  [77] ggridges_0.5.3              org.Gg.eg.db_3.13.0        
##  [79] evaluate_0.20               fastmap_1.1.0              
##  [81] yaml_2.2.1                  goftest_1.2-2              
##  [83] knitr_1.41                  bit64_4.0.5                
##  [85] fitdistrplus_1.1-6          purrr_0.3.4                
##  [87] RANN_2.6.1                  KEGGREST_1.32.0            
##  [89] pbapply_1.4-3               future_1.30.0              
##  [91] nlme_3.1-152                mime_0.10                  
##  [93] compiler_4.1.0              plotly_4.10.0              
##  [95] png_0.1-7                   spatstat.utils_3.0-1       
##  [97] bslib_0.2.5.1               stringi_1.6.2              
##  [99] highr_0.9                   lattice_0.20-44            
## [101] Matrix_1.3-3                vctrs_0.5.1                
## [103] pillar_1.8.1                lifecycle_1.0.3            
## [105] spatstat.geom_3.0-3         lmtest_0.9-38              
## [107] jquerylib_0.1.4             RcppAnnoy_0.0.19           
## [109] data.table_1.14.0           cowplot_1.1.1              
## [111] bitops_1.0-7                irlba_2.3.3                
## [113] GenomicRanges_1.44.0        httpuv_1.6.1               
## [115] patchwork_1.1.1             R6_2.5.0                   
## [117] promises_1.2.0.1            KernSmooth_2.23-20         
## [119] gridExtra_2.3               IRanges_2.26.0             
## [121] parallelly_1.33.0           codetools_0.2-18           
## [123] MASS_7.3-54                 assertthat_0.2.1           
## [125] MAST_1.18.0                 SummarizedExperiment_1.22.0
## [127] withr_2.4.2                 sctransform_0.3.3          
## [129] S4Vectors_0.30.0            GenomeInfoDbData_1.2.6     
## [131] hms_1.1.0                   mgcv_1.8-35                
## [133] parallel_4.1.0              grid_4.1.0                 
## [135] rpart_4.1-15                tidyr_1.1.3                
## [137] rmarkdown_2.17              MatrixGenerics_1.4.0       
## [139] Rtsne_0.15                  Biobase_2.52.0             
## [141] shiny_1.6.0